#Replication for Generalizability of IR experiments beyond the U.S.
#Produce Heatmap (Figure 1) -- geographical distribution of IR experiments

# load all relevant themes and packages-------

rm(list=ls())
library("tidyverse")
library("ggplot2")
library("RColorBrewer")
library("ggthemes")
library("countrycode")

# Read data-------

data <- read.csv("data/journal_experiments/studies.csv")


#count country study by year
cntry_sum <- 
  data %>% 
  group_by(country,year) %>% 
  summarise(.,
            count = n()) 

cntry_sum<-as.data.frame(cntry_sum)
cntry_sum<-drop_na(cntry_sum, country)

#add all country names
un_country <- read.csv("data/journal_experiments/un_country.csv") #UN countries by continent
un_country$country<-un_country$Country
countries<-un_country$country #List countries

country <- rep(countries, each= 20)
year <- rep(c(2001:2020), 193)

df <- data.frame(country, year)

df$year<-as.numeric(df$year)
cntry_sum$year<-as.numeric(cntry_sum$year)

#merge with cntry_sum
df2<-merge(df,cntry_sum,by=c("country","year"),all.x=T)
df2[is.na(df2)] <- 0

#merge for continents
df2<-merge(df2,un_country,by="country",all.x=T,all.y = F)

df2 <-df2[order(df2$Continent,df2$country),]#order by continent
levels(df2)<-unique(df2$country)

df2$year<-as.character(df2$year)

#Create figure-------

p <- ggplot(df2, aes(x=country, y=year, fill=count))+
  geom_tile()+
  facet_grid(cols = vars(Continent), scales= "free", space = "free_x")+
  
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  scale_fill_gradientn(colours = c("white", "lightgray", "black"),
                       values = scales::rescale(c(0, 1, 45)))+
  ylab("")+
  xlab("")+
  labs(fill = "Studies")
p

ggsave("figures/main_text/heatmap.pdf", width = 23, height = 10)
